/*1 May 2013Julia Goldberg, Ellen Moscoe, Iryna Postolovska"Estimating HIV Prevalence: Can Heckman Help?"This do file is intended to replicate the results from our paper.The original source for cleaning the data and reproducing the model from Barnighausen et al. (2011) can be found at: http://hdl.handle.net/1902.1/17657We are grateful to the authors for making their code publicly available through the Harvard Institute for Quantitative Social Science dataverse.Before you attempt to run the code, you will need to download the Zambia 2007 DHS data (household, female, male, and HIV components).To download the DHS data, please visit: http://www.measuredhs.com/Please refer to DHS codebooks for variable descriptions after obtaining data (visit http://www.measuredhs.com/data/dataset/Zambia_Standard-DHS_2007.cfm)*/*****************************************************************************************************************For the additional models, create variables for age-squared and male circumcisiongen agesq=age^2gen circumcised=mv483 if mv483!=9*Barnighausen et al. modelglobal ind_vars_m agecat5 education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity languageglobal reg_ind_vars_m i.agecat5 education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language*Age-squaredglobal ind_vars_m_agesq age agesq education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity languageglobal reg_ind_vars_m_agesq age agesq education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language	  *Circumcisedglobal ind_vars_m_cm agecat5 education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity language circumcisedglobal reg_ind_vars_m_cm i.agecat5 education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language i.circumcised*Age-squared + circumcisedglobal ind_vars_m_agesq_cm age agesq education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity language circumcisedglobal reg_ind_vars_m_agesq_cm age agesq education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language i.circumcised*Women individual questionnaire (for Barnighausen et al. and circumcision models)global ind_vars_w agecat5 education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity languageglobal reg_ind_vars_w i.agecat5 education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language *Women - age squaredglobal ind_vars_w_agesq age agesq education wealthcat location region marital  ///	  std age1sex_cat highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity language global reg_ind_vars_w_agesq age agesq education i.wealthcat i.location i.region i.marital  ///	  i.std i.age1sex_cat i.highhiv i.partner i.condom i.aidscare i.knowsdiedofaids i.evertestedHIV ///	  i.smoke i.alcohol i.religion i.ethnicity i.language *Household questionnaireglobal hh_vars agecat5 education wealthcat region locationglobal reg_hh_vars i.agecat5 education i.wealthcat i.region i.location*Household questionnaire with agesqglobal hh_vars_agesq age agesq education wealthcat region locationglobal reg_hh_vars_agesq age agesq education i.wealthcat i.region i.location*****************************************************************************************************************BARNIGHAUSEN ET AL.(2011) MODEL*Men Consent*Selection model xi: heckprob hiv $reg_ind_vars_m, sel(hivconsent = $reg_ind_vars_m i.interviewerID_50m) cluster(hv001) *After running the model, make conditional prediction: prob(HIV+ | No_Participation) = prob(HIV+ and No_Participation) / prob(No_Participation) predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen m_consent_p1_0 = p10/(p10+p00) if predGroup==1    /*The conditional prediction of HIV status among those without test*/ sum m_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00*Men Contact*Selection model xi:heckprob hiv $reg_hh_vars, sel(hivconsent = $reg_hh_vars firstday i.hhinterviewerID_50m) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen m_contact_p1_0 = p10/(p10+p00) if predGroup==2 su m_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Women Selection model xi: heckprob hiv $reg_ind_vars_w, sel(hivconsent = $reg_ind_vars_w i.interviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen w_consent_p1_0 = p10/(p10+p00) if predGroup==1 su w_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00** Women contact *Selection model xi: heckprob hiv $reg_hh_vars, sel(hivconsent = $reg_hh_vars firstday i.hhinterviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen w_contact_p1_0 = p10/(p10+p00) if predGroup==2 su w_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Estimate nationally representative prevalence svyset hv001 [pweight=hhweight], strata(stratum)**Menreplace hiv = m_consent_p1_0 if sex==1 & predGroup==1replace hiv = m_contact_p1_0 if sex==1 & predGroup==2svy: mean hiv, subpop(if sex==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2)   /*Those who were not contacted*/**Womenreplace hiv = w_consent_p1_0 if sex==2 & predGroup==1replace hiv = w_contact_p1_0 if sex==2 & predGroup==2svy: mean hiv, subpop(if sex==2)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2)   /*Those who were not contacted*/***NEVER HAD SEX TABLE 2*****Estimate HIV prevalence among those that never had sex**Mensvy: mean hiv, subpop(if sex==1 & age1sex_cat==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0 & age1sex_cat==1)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1 & age1sex_cat==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2 & age1sex_cat==1)   /*Those who were not contacted*/**Womensvy: mean hiv, subpop(if sex==2 & age1sex_cat==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0 & age1sex_cat==1)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1 & age1sex_cat==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2 & age1sex_cat==1)   /*Those who were not contacted*/*****************************************************************************************************************AGE SQUARED*Men Consent*Selection model xi: heckprob hiv $reg_ind_vars_m_agesq, sel(hivconsent = $reg_ind_vars_m_agesq i.interviewerID_50m) cluster(hv001) *After running the model, make conditional prediction: prob(HIV+ | No_Participation) = prob(HIV+ and No_Participation) / prob(No_Participation) predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen m_consent_p1_0 = p10/(p10+p00) if predGroup==1    /*The conditional prediction of HIV status among those without test*/ sum m_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00*Men Contact*Selection model xi:heckprob hiv $reg_hh_vars_agesq, sel(hivconsent = $reg_hh_vars_agesq firstday i.hhinterviewerID_50m) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen m_contact_p1_0 = p10/(p10+p00) if predGroup==2 su m_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Women Selection model xi: heckprob hiv $reg_ind_vars_w_agesq, sel(hivconsent = $reg_ind_vars_w_agesq i.interviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen w_consent_p1_0 = p10/(p10+p00) if predGroup==1 su w_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00** Women contact *Selection model xi: heckprob hiv $reg_hh_vars_agesq, sel(hivconsent = $reg_hh_vars_agesq firstday i.hhinterviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen w_contact_p1_0 = p10/(p10+p00) if predGroup==2 su w_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Estimate nationally representative HIV prevalencesvyset hv001 [pweight=hhweight], strata(stratum)**Menreplace hiv = m_consent_p1_0 if sex==1 & predGroup==1replace hiv = m_contact_p1_0 if sex==1 & predGroup==2svy: mean hiv, subpop(if sex==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2)   /*Those who were not contacted*/**Womenreplace hiv = w_consent_p1_0 if sex==2 & predGroup==1replace hiv = w_contact_p1_0 if sex==2 & predGroup==2svy: mean hiv, subpop(if sex==2)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2)   /*Those who were not contacted*/svy: mean hiv***NEVER HAD SEX TABLE 2******Mensvy: mean hiv, subpop(if sex==1 & age1sex_cat==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0 & age1sex_cat==1)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1 & age1sex_cat==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2 & age1sex_cat==1)   /*Those who were not contacted*/**Womensvy: mean hiv, subpop(if sex==2 & age1sex_cat==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0 & age1sex_cat==1)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1 & age1sex_cat==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2 & age1sex_cat==1)   /*Those who were not contacted*/*****************************************************************************************************************CIRCUMCISION*Men Consent*Selection model xi: heckprob hiv $reg_ind_vars_m_cm, sel(hivconsent = $reg_ind_vars_m_cm i.interviewerID_50m) cluster(hv001) *After running the model, make conditional prediction: prob(HIV+ | No_Participation) = prob(HIV+ and No_Participation) / prob(No_Participation) predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen m_consent_p1_0 = p10/(p10+p00) if predGroup==1    /*The conditional prediction of HIV status among those without test*/ sum m_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00*Men Contact*Selection model xi:heckprob hiv $reg_hh_vars, sel(hivconsent = $reg_hh_vars firstday i.hhinterviewerID_50m) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen m_contact_p1_0 = p10/(p10+p00) if predGroup==2 su m_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Women Selection model xi: heckprob hiv $reg_ind_vars_w, sel(hivconsent = $reg_ind_vars_w i.interviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen w_consent_p1_0 = p10/(p10+p00) if predGroup==1 su w_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00** Women contact *Selection model xi: heckprob hiv $reg_hh_vars, sel(hivconsent = $reg_hh_vars firstday i.hhinterviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen w_contact_p1_0 = p10/(p10+p00) if predGroup==2 su w_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00 svyset hv001 [pweight=hhweight], strata(stratum)**Menreplace hiv = m_consent_p1_0 if sex==1 & predGroup==1replace hiv = m_contact_p1_0 if sex==1 & predGroup==2svy: mean hiv, subpop(if sex==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2)   /*Those who were not contacted*/**Womenreplace hiv = w_consent_p1_0 if sex==2 & predGroup==1replace hiv = w_contact_p1_0 if sex==2 & predGroup==2svy: mean hiv, subpop(if sex==2)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2)   /*Those who were not contacted*/*****************************************************************************************************************AGE SQUARED & CIRCUMCISION*Men Consent*Selection model xi: heckprob hiv $reg_ind_vars_m_agesq_cm, sel(hivconsent = $reg_ind_vars_m_agesq_cm i.interviewerID_50m) cluster(hv001) *After running the model, make conditional prediction: prob(HIV+ | No_Participation) = prob(HIV+ and No_Participation) / prob(No_Participation) predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen m_consent_p1_0 = p10/(p10+p00) if predGroup==1    /*The conditional prediction of HIV status among those without test*/ sum m_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00*Men Contact*Selection model xi:heckprob hiv $reg_hh_vars_agesq, sel(hivconsent = $reg_hh_vars_agesq firstday i.hhinterviewerID_50m) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen m_contact_p1_0 = p10/(p10+p00) if predGroup==2 su m_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00*Women Selection model xi: heckprob hiv $reg_ind_vars_w_agesq, sel(hivconsent = $reg_ind_vars_w_agesq i.interviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==1, p10 predict p00 if predGroup==1, p00 gen w_consent_p1_0 = p10/(p10+p00) if predGroup==1 su w_consent_p1_0 if predGroup==1   /*Predicted prevalence in those who refused consent*/ drop p10 p00** Women contact *Selection model xi: heckprob hiv $reg_hh_vars_agesq, sel(hivconsent = $reg_hh_vars_agesq firstday i.hhinterviewerID_50w) cluster(hv001) *Conditional prediction predict p10 if predGroup==2, p10 predict p00 if predGroup==2, p00 gen w_contact_p1_0 = p10/(p10+p00) if predGroup==2 su w_contact_p1_0 if predGroup==2   /*Predicted prevalence in those who were not contacted*/ drop p10 p00 svyset hv001 [pweight=hhweight], strata(stratum)**Menreplace hiv = m_consent_p1_0 if sex==1 & predGroup==1replace hiv = m_contact_p1_0 if sex==1 & predGroup==2svy: mean hiv, subpop(if sex==1)                  /*National estimate*/svy: mean hiv, subpop(if sex==1 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==1 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==1 & predGroup==2)   /*Those who were not contacted*/**Womenreplace hiv = w_consent_p1_0 if sex==2 & predGroup==1replace hiv = w_contact_p1_0 if sex==2 & predGroup==2svy: mean hiv, subpop(if sex==2)                  /*National estimate*/svy: mean hiv, subpop(if sex==2 & predGroup==0)   /*Complete case*/svy: mean hiv, subpop(if sex==2 & predGroup==1)   /*Those who refused consent*/svy: mean hiv, subpop(if sex==2 & predGroup==2)   /*Those who were not contacted*/*******************************************************************************************************************MULTIPLE IMPUTATION***/*For multiple imputation, you will need to create a new dataset using the following steps.Once you have created the dataset, please use the R code provided ("Estimating HIV Prevalence MI Goldberg, Moscoe, Postolovska.R") to replicate the multiple imputation results in R.*/ clear*use the dataset created from Barnighausen et al. (2011)keep hv001 hhweight age education wealthcat region location education wealthcat location region marital  ///	  std highhiv partner condom aidscare knowsdiedofaids evertestedHIV ///	  smoke alcohol religion ethnicity language stratum ///	  wage1sex mage1sex predGroup interviewerID_50w interviewerID_50m sex hiv hivconsentkeep if hivconsent==1 | hivconsent==0*Create unique IDgen id=_n*Drop missing interviewer IDdrop if interviewerID_50m==.*Save datasave "forimputation.dta", replace*******************************************************************************************************************Estimating prevalence from multiple imputationclear*insheet using "imp_data1.csv"svyset imp1hv001 [pweight=imp1hhweight], strata (imp1stratum)svy: mean imp1hiv**Mensvy: mean imp1hiv, subpop (if imp1sex=="male")                 /*National estimate*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="0")   /*Complete case*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="1")   /*Those who refused consent*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="2")   /*Those who were not contacted*/**Womensvy: mean imp1hiv, subpop(if imp1sex=="female")                  /*National estimate*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="0")   /*Complete case*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="1")   /*Those who refused consent*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="2")   /*Those who were not contacted*/***Prevalence among those that never had sexgen wage1sex_cat= 1 if imp1wage1sex==0replace wage1sex_cat=2 if imp1wage1sex<=15 & imp1wage1sex>0replace wage1sex_cat=3 if imp1wage1sex>15 & imp1wage1sex != .gen mage1sex_cat = 1 if imp1mage1sex==0replace mage1sex_cat = 2 if imp1mage1sex<=15 & imp1mage1sex>0replace mage1sex_cat = 3 if imp1mage1sex>15 & imp1mage1sex != .replace mage1sex=. if imp1mage1sex==99gen age1sex_cat = mage1sex_cat if imp1sex =="male"replace age1sex_cat = wage1sex_cat if imp1sex == "female"**Mensvy: mean imp1hiv, subpop (if imp1sex=="male" & age1sex_cat==1)                 /*National estimate*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="0" & age1sex_cat==1)   /*Complete case*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="1" & age1sex_cat==1)   /*Those who refused consent*/svy: mean imp1hiv, subpop(if imp1sex=="male" & imp1predgroup=="2" & age1sex_cat==1)   /*Those who were not contacted*/**Womensvy: mean imp1hiv, subpop(if imp1sex=="female" & age1sex_cat==1)                  /*National estimate*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="0" & age1sex_cat==1)   /*Complete case*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="1" & age1sex_cat==1)   /*Those who refused consent*/svy: mean imp1hiv, subpop(if imp1sex=="female" & imp1predgroup=="2" & age1sex_cat==1)   /*Those who were not contacted*/